Arrange directory structure for new I/O drivers.
40420a73Wou6JlsZDiu6YwjYomsm7A xenolinux-2.4.26-sparse/arch/xen/drivers/evtchn/evtchn.c
3e5a4e65gZBRBB6RsSVg1c9iahigAw xenolinux-2.4.26-sparse/arch/xen/drivers/network/Makefile
3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c
-4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile
-4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/block.c
-4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/block.h
-4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vbd.c
-405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile
-405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/vnetif.c
+4083dc16z0jvZEH4PiVDbDRreaNp6w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile
+4083dc16KQus88a4U3uCV6qVCA6_8Q xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile
+4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile
+4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c
+4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h
+4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c
+4083dc16-Kd5y9psK_yk161sme5j5Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile
+4083dc16UmHXxS9g_UFVnkUpN-oP2Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile
+405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile
+405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c
3e5a4e65lWzkiPXsZdzPt2RNnJGG1g xenolinux-2.4.26-sparse/arch/xen/kernel/Makefile
4075806dE5mQwlVUf8-t3YXjiMMWDQ xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c
3e5a4e65_hqfuxtGG8IUy6wRM86Ecg xenolinux-2.4.26-sparse/arch/xen/kernel/entry.S
SUBDIRS += arch/xen/kernel arch/xen/mm arch/xen/lib
SUBDIRS += arch/xen/drivers/console
SUBDIRS += arch/xen/drivers/evtchn
-ifdef CONFIG_XEN_PHYSDEV_ACCESS
+ifdef CONFIG_XEN_NEWIO
SUBDIRS += arch/xen/drivers/vblkif
SUBDIRS += arch/xen/drivers/vnetif
else
CORE_FILES += arch/xen/kernel/kernel.o arch/xen/mm/mm.o
CORE_FILES += arch/xen/drivers/evtchn/drv.o
CORE_FILES += arch/xen/drivers/console/drv.o
-ifdef CONFIG_XEN_PHYSDEV_ACCESS
+ifdef CONFIG_XEN_NEWIO
CORE_FILES += arch/xen/drivers/vblkif/drv.o
CORE_FILES += arch/xen/drivers/vnetif/drv.o
else
mainmenu_option next_comment
comment 'Xen'
bool 'Support for privileged operations (domain 0)' CONFIG_XEN_PRIVILEGED_GUEST
-bool 'Support for direct physical device access' CONFIG_XEN_PHYSDEV_ACCESS
+bool 'New I/O model (no drivers in Xen) [EXPERIMENTAL]' CONFIG_XEN_NEWIO
+if [ "$CONFIG_XEN_NEWIO" = "y" ]; then
+ bool 'Device-driver domain (physical device access)' CONFIG_XEN_PHYSDEV_ACCESS
+fi
endmenu
# The IBM S/390 patch needs this.
define_bool CONFIG_NO_IDLE_HZ y
source net/Config.in
fi
-if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "n" ]; then
- #
- # Block device driver configuration
- #
- mainmenu_option next_comment
- comment 'Block devices'
- tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
- dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
- tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
- if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then
- int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096
- fi
- dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM
- bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS
- bool 'XenoLinux virtual block device support' CONFIG_XEN_VBD
- define_bool CONFIG_BLK_DEV_HD n
- endmenu
-else
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
mainmenu_option next_comment
comment 'ATA/IDE/MFM/RLL support'
# input before char - char/joystick depends on it. As does USB.
#
source drivers/input/Config.in
+else
+ #
+ # Block device driver configuration
+ #
+ mainmenu_option next_comment
+ comment 'Block devices'
+ tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
+ dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
+ tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
+ if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then
+ int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096
+ fi
+ dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM
+ bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS
+ bool 'XenoLinux virtual block device support' CONFIG_XEN_VBD
+ define_bool CONFIG_BLK_DEV_HD n
+ endmenu
fi
source drivers/char/Config.in
# Xen
#
CONFIG_XEN_PRIVILEGED_GUEST=y
-# CONFIG_XEN_PHYSDEV_ACCESS is not set
+# CONFIG_XEN_NEWIO is not set
CONFIG_NO_IDLE_HZ=y
#
# Xen
#
CONFIG_XEN_PRIVILEGED_GUEST=y
+CONFIG_XEN_NEWIO=y
CONFIG_XEN_PHYSDEV_ACCESS=y
CONFIG_NO_IDLE_HZ=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_TSC=y
CONFIG_X86_L1_CACHE_SHIFT=5
+CONFIG_NOHIGHMEM=y
+# CONFIG_HIGHMEM4G is not set
#
# General setup
#
# SCTP Configuration (EXPERIMENTAL)
#
-CONFIG_IPV6_SCTP__=y
# CONFIG_IP_SCTP is not set
# CONFIG_ATM is not set
# CONFIG_VLAN_8021Q is not set
CONFIG_WDC_ALI15X3=y
CONFIG_BLK_DEV_AMD74XX=y
CONFIG_AMD74XX_OVERRIDE=y
+# CONFIG_BLK_DEV_ATIIXP is not set
CONFIG_BLK_DEV_CMD64X=y
CONFIG_BLK_DEV_TRIFLEX=y
CONFIG_BLK_DEV_CY82C693=y
# CONFIG_BLK_DEV_ATARAID is not set
# CONFIG_BLK_DEV_ATARAID_PDC is not set
# CONFIG_BLK_DEV_ATARAID_HPT is not set
+# CONFIG_BLK_DEV_ATARAID_MEDLEY is not set
# CONFIG_BLK_DEV_ATARAID_SII is not set
#
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
# CONFIG_NE2K_PCI is not set
+# CONFIG_FORCEDETH is not set
# CONFIG_NE3210 is not set
# CONFIG_ES3210 is not set
# CONFIG_8139CP is not set
+
O_TARGET := drv.o
-obj-y := block.o vbd.o
+
+subdir-y += frontend
+obj-y += frontend/drv.o
+
+subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
+obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend/drv.o
+
include $(TOPDIR)/Rules.make
--- /dev/null
+O_TARGET := drv.o
+obj-y := main.o
+include $(TOPDIR)/Rules.make
+++ /dev/null
-/******************************************************************************
- * block.c
- *
- * Xenolinux virtual block-device driver.
- *
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
- */
-
-#include "block.h"
-#include <linux/blk.h>
-#include <linux/cdrom.h>
-#include <linux/tqueue.h>
-#include <linux/sched.h>
-#include <scsi/scsi.h>
-
-#include <linux/interrupt.h>
-
-typedef unsigned char byte; /* from linux/ide.h */
-
-#define STATE_ACTIVE 0
-#define STATE_SUSPENDED 1
-#define STATE_CLOSED 2
-static unsigned int state = STATE_SUSPENDED;
-
-/* Dynamically-mapped IRQs. */
-static int xlblk_response_irq, xlblk_update_irq;
-
-static blk_ring_t *blk_ring;
-static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
-static BLK_RING_IDX req_prod; /* Private request producer. */
-
-/* We plug the I/O ring if the driver is suspended or if the ring is full. */
-#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
- (state != STATE_ACTIVE))
-
-
-/*
- * Request queues with outstanding work, but ring is currently full.
- * We need no special lock here, as we always access this with the
- * io_request_lock held. We only need a small maximum list.
- */
-#define MAX_PENDING 8
-static request_queue_t *pending_queues[MAX_PENDING];
-static int nr_pending;
-
-static kdev_t sg_dev;
-static int sg_operation = -1;
-static unsigned long sg_next_sect;
-#define DISABLE_SCATTERGATHER() (sg_operation = -1)
-
-static inline void signal_requests_to_xen(void)
-{
- block_io_op_t op;
-
- DISABLE_SCATTERGATHER();
- blk_ring->req_prod = req_prod;
-
- op.cmd = BLOCK_IO_OP_SIGNAL;
- HYPERVISOR_block_io_op(&op);
- return;
-}
-
-
-/*
- * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
- *
- * Schedule a task for keventd to run, which will update the VBDs and perform
- * the corresponding updates to our view of VBD state, so the XenoLinux will
- * respond to changes / additions / deletions to the set of VBDs automatically.
- */
-static struct tq_struct update_tq;
-static void update_vbds_task(void *unused)
-{
- xlvbd_update_vbds();
-}
-static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
-{
- update_tq.routine = update_vbds_task;
- schedule_task(&update_tq);
-}
-
-
-int xen_block_open(struct inode *inode, struct file *filep)
-{
- short xldev = inode->i_rdev;
- struct gendisk *gd = get_gendisk(xldev);
- xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
- short minor = MINOR(xldev);
-
- if ( gd->part[minor].nr_sects == 0 )
- {
- /*
- * Device either doesn't exist, or has zero capacity; we use a few
- * cheesy heuristics to return the relevant error code
- */
- if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
- ((minor & (gd->max_p - 1)) != 0) )
- {
- /*
- * We have a real device, but no such partition, or we just have a
- * partition number so guess this is the problem.
- */
- return -ENXIO; /* no such device or address */
- }
- else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
- {
- /* This is a removable device => assume that media is missing. */
- return -ENOMEDIUM; /* media not present (this is a guess) */
- }
- else
- {
- /* Just go for the general 'no such device' error. */
- return -ENODEV; /* no such device */
- }
- }
-
- /* Update of usage count is protected by per-device semaphore. */
- disk->usage++;
-
- return 0;
-}
-
-
-int xen_block_release(struct inode *inode, struct file *filep)
-{
- xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
-
- /*
- * When usage drops to zero it may allow more VBD updates to occur.
- * Update of usage count is protected by a per-device semaphore.
- */
- if ( --disk->usage == 0 )
- {
- update_tq.routine = update_vbds_task;
- schedule_task(&update_tq);
- }
-
- return 0;
-}
-
-
-int xen_block_ioctl(struct inode *inode, struct file *filep,
- unsigned command, unsigned long argument)
-{
- kdev_t dev = inode->i_rdev;
- struct hd_geometry *geo = (struct hd_geometry *)argument;
- struct gendisk *gd;
- struct hd_struct *part;
- int i;
-
- /* NB. No need to check permissions. That is done for us. */
-
- DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
- command, (long) argument, dev);
-
- gd = get_gendisk(dev);
- part = &gd->part[MINOR(dev)];
-
- switch ( command )
- {
- case BLKGETSIZE:
- DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
- return put_user(part->nr_sects, (unsigned long *) argument);
-
- case BLKGETSIZE64:
- DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
- (u64)part->nr_sects * 512);
- return put_user((u64)part->nr_sects * 512, (u64 *) argument);
-
- case BLKRRPART: /* re-read partition table */
- DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART);
- return xen_block_revalidate(dev);
-
- case BLKSSZGET:
- return hardsect_size[MAJOR(dev)][MINOR(dev)];
-
- case BLKBSZGET: /* get block size */
- DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET);
- break;
-
- case BLKBSZSET: /* set block size */
- DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET);
- break;
-
- case BLKRASET: /* set read-ahead */
- DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET);
- break;
-
- case BLKRAGET: /* get read-ahead */
- DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET);
- break;
-
- case HDIO_GETGEO:
- /* note: these values are complete garbage */
- DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO);
- if (!argument) return -EINVAL;
- if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
- if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
- if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
- if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
- return 0;
-
- case HDIO_GETGEO_BIG:
- /* note: these values are complete garbage */
- DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
- if (!argument) return -EINVAL;
- if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
- if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
- if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
- if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
- return 0;
-
- case CDROMMULTISESSION:
- DPRINTK("FIXME: support multisession CDs later\n");
- for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
- if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
- return 0;
-
- case SCSI_IOCTL_GET_BUS_NUMBER:
- DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev");
- return -ENOSYS;
-
- default:
- printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command);
- return -ENOSYS;
- }
-
- return 0;
-}
-
-/* check media change: should probably do something here in some cases :-) */
-int xen_block_check(kdev_t dev)
-{
- DPRINTK("xen_block_check\n");
- return 0;
-}
-
-int xen_block_revalidate(kdev_t dev)
-{
- struct block_device *bd;
- struct gendisk *gd;
- xl_disk_t *disk;
- unsigned long capacity;
- int i, rc = 0;
-
- if ( (bd = bdget(dev)) == NULL )
- return -EINVAL;
-
- /*
- * Update of partition info, and check of usage count, is protected
- * by the per-block-device semaphore.
- */
- down(&bd->bd_sem);
-
- if ( ((gd = get_gendisk(dev)) == NULL) ||
- ((disk = xldev_to_xldisk(dev)) == NULL) ||
- ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
- {
- rc = -EINVAL;
- goto out;
- }
-
- if ( disk->usage > 1 )
- {
- rc = -EBUSY;
- goto out;
- }
-
- /* Only reread partition table if VBDs aren't mapped to partitions. */
- if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
- {
- for ( i = gd->max_p - 1; i >= 0; i-- )
- {
- invalidate_device(dev+i, 1);
- gd->part[MINOR(dev+i)].start_sect = 0;
- gd->part[MINOR(dev+i)].nr_sects = 0;
- gd->sizes[MINOR(dev+i)] = 0;
- }
-
- grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
- }
-
- out:
- up(&bd->bd_sem);
- bdput(bd);
- return rc;
-}
-
-
-/*
- * hypervisor_request
- *
- * request block io
- *
- * id: for guest use only.
- * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
- * buffer: buffer to read/write into. this should be a
- * virtual address in the guest os.
- */
-static int hypervisor_request(unsigned long id,
- int operation,
- char * buffer,
- unsigned long sector_number,
- unsigned short nr_sectors,
- kdev_t device)
-{
- unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer));
- struct gendisk *gd;
- blk_ring_req_entry_t *req;
- struct buffer_head *bh;
-
- if ( unlikely(nr_sectors >= (1<<9)) )
- BUG();
- if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
- BUG();
-
- if ( unlikely(state == STATE_CLOSED) )
- return 1;
-
- switch ( operation )
- {
-
- case XEN_BLOCK_READ:
- case XEN_BLOCK_WRITE:
- gd = get_gendisk(device);
-
- /*
- * Update the sector_number we'll pass down as appropriate; note that
- * we could sanity check that resulting sector will be in this
- * partition, but this will happen in xen anyhow.
- */
- sector_number += gd->part[MINOR(device)].start_sect;
-
- /*
- * If this unit doesn't consist of virtual (i.e., Xen-specified)
- * partitions then we clear the partn bits from the device number.
- */
- if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
- GENHD_FL_VIRT_PARTNS) )
- device &= ~(gd->max_p - 1);
-
- if ( (sg_operation == operation) &&
- (sg_dev == device) &&
- (sg_next_sect == sector_number) )
- {
- req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
- bh = (struct buffer_head *)id;
- bh->b_reqnext = (struct buffer_head *)req->id;
- req->id = id;
- req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
- if ( ++req->nr_segments < MAX_BLK_SEGS )
- sg_next_sect += nr_sectors;
- else
- DISABLE_SCATTERGATHER();
- return 0;
- }
- else if ( RING_PLUGGED )
- {
- return 1;
- }
- else
- {
- sg_operation = operation;
- sg_dev = device;
- sg_next_sect = sector_number + nr_sectors;
- }
- break;
-
- default:
- panic("unknown op %d\n", operation);
- }
-
- /* Fill out a communications ring structure. */
- req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
- req->id = id;
- req->operation = operation;
- req->sector_number = (xen_sector_t)sector_number;
- req->device = device;
- req->nr_segments = 1;
- req->buffer_and_sects[0] = buffer_ma | nr_sectors;
- req_prod++;
-
- return 0;
-}
-
-
-/*
- * do_xlblk_request
- * read a block; request is in a request queue
- */
-void do_xlblk_request(request_queue_t *rq)
-{
- struct request *req;
- struct buffer_head *bh, *next_bh;
- int rw, nsect, full, queued = 0;
-
- DPRINTK("xlblk.c::do_xlblk_request\n");
-
- while ( !rq->plugged && !list_empty(&rq->queue_head))
- {
- if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
- goto out;
-
- DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
- req, req->cmd, req->sector,
- req->current_nr_sectors, req->nr_sectors, req->bh);
-
- rw = req->cmd;
- if ( rw == READA )
- rw = READ;
- if ( unlikely((rw != READ) && (rw != WRITE)) )
- panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
-
- req->errors = 0;
-
- bh = req->bh;
- while ( bh != NULL )
- {
- next_bh = bh->b_reqnext;
- bh->b_reqnext = NULL;
-
- full = hypervisor_request(
- (unsigned long)bh,
- (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE,
- bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
-
- if ( full )
- {
- bh->b_reqnext = next_bh;
- pending_queues[nr_pending++] = rq;
- if ( unlikely(nr_pending >= MAX_PENDING) )
- BUG();
- goto out;
- }
-
- queued++;
-
- /* Dequeue the buffer head from the request. */
- nsect = bh->b_size >> 9;
- bh = req->bh = next_bh;
-
- if ( bh != NULL )
- {
- /* There's another buffer head to do. Update the request. */
- req->hard_sector += nsect;
- req->hard_nr_sectors -= nsect;
- req->sector = req->hard_sector;
- req->nr_sectors = req->hard_nr_sectors;
- req->current_nr_sectors = bh->b_size >> 9;
- req->buffer = bh->b_data;
- }
- else
- {
- /* That was the last buffer head. Finalise the request. */
- if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
- BUG();
- blkdev_dequeue_request(req);
- end_that_request_last(req);
- }
- }
- }
-
- out:
- if ( queued != 0 ) signal_requests_to_xen();
-}
-
-
-static void kick_pending_request_queues(void)
-{
- /* We kick pending request queues if the ring is reasonably empty. */
- if ( (nr_pending != 0) &&
- ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
- {
- /* Attempt to drain the queue, but bail if the ring becomes full. */
- while ( (nr_pending != 0) && !RING_PLUGGED )
- do_xlblk_request(pending_queues[--nr_pending]);
- }
-}
-
-
-static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
-{
- BLK_RING_IDX i;
- unsigned long flags;
- struct buffer_head *bh, *next_bh;
-
- if ( unlikely(state == STATE_CLOSED) )
- return;
-
- spin_lock_irqsave(&io_request_lock, flags);
-
- for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
- {
- blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
- switch ( bret->operation )
- {
- case XEN_BLOCK_READ:
- case XEN_BLOCK_WRITE:
- if ( unlikely(bret->status != 0) )
- DPRINTK("Bad return from blkdev data request: %lx\n",
- bret->status);
- for ( bh = (struct buffer_head *)bret->id;
- bh != NULL;
- bh = next_bh )
- {
- next_bh = bh->b_reqnext;
- bh->b_reqnext = NULL;
- bh->b_end_io(bh, !bret->status);
- }
- break;
-
- default:
- BUG();
- }
- }
-
- resp_cons = i;
-
- kick_pending_request_queues();
-
- spin_unlock_irqrestore(&io_request_lock, flags);
-}
-
-
-static void reset_xlblk_interface(void)
-{
- block_io_op_t op;
-
- nr_pending = 0;
-
- op.cmd = BLOCK_IO_OP_RESET;
- if ( HYPERVISOR_block_io_op(&op) != 0 )
- printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
-
- op.cmd = BLOCK_IO_OP_RING_ADDRESS;
- (void)HYPERVISOR_block_io_op(&op);
-
- set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
- blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
- blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
-
- wmb();
- state = STATE_ACTIVE;
-}
-
-
-int __init xlblk_init(void)
-{
- int error;
-
- reset_xlblk_interface();
-
- xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
- xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD);
-
- error = request_irq(xlblk_response_irq, xlblk_response_int,
- SA_SAMPLE_RANDOM, "blkdev", NULL);
- if ( error )
- {
- printk(KERN_ALERT "Could not allocate receive interrupt\n");
- goto fail;
- }
-
- error = request_irq(xlblk_update_irq, xlblk_update_int,
- 0, "blkdev", NULL);
-
- if ( error )
- {
- printk(KERN_ALERT "Could not allocate block update interrupt\n");
- goto fail;
- }
-
- (void)xlvbd_init();
-
- return 0;
-
- fail:
- return error;
-}
-
-
-static void __exit xlblk_cleanup(void)
-{
- xlvbd_cleanup();
- free_irq(xlblk_response_irq, NULL);
- free_irq(xlblk_update_irq, NULL);
- unbind_virq_from_irq(VIRQ_BLKDEV);
- unbind_virq_from_irq(VIRQ_VBD_UPD);
-}
-
-
-#ifdef MODULE
-module_init(xlblk_init);
-module_exit(xlblk_cleanup);
-#endif
-
-
-void blkdev_suspend(void)
-{
- state = STATE_SUSPENDED;
- wmb();
-
- while ( resp_cons != blk_ring->req_prod )
- {
- barrier();
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(1);
- }
-
- wmb();
- state = STATE_CLOSED;
- wmb();
-
- clear_fixmap(FIX_BLKRING_BASE);
-}
-
-
-void blkdev_resume(void)
-{
- reset_xlblk_interface();
- spin_lock_irq(&io_request_lock);
- kick_pending_request_queues();
- spin_unlock_irq(&io_request_lock);
-}
+++ /dev/null
-/******************************************************************************
- * block.h
- *
- * Shared definitions between all levels of XenoLinux Virtual block devices.
- */
-
-#ifndef __XEN_DRIVERS_BLOCK_H__
-#define __XEN_DRIVERS_BLOCK_H__
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-
-#include <linux/fs.h>
-#include <linux/hdreg.h>
-#include <linux/blkdev.h>
-#include <linux/major.h>
-
-#include <asm/hypervisor-ifs/hypervisor-if.h>
-#include <asm/hypervisor-ifs/vbd.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
-#include <asm/uaccess.h>
-
-#if 0
-#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
-#else
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-#if 0
-#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
-#else
-#define DPRINTK_IOCTL(_f, _a...) ((void)0)
-#endif
-
-/* Private gendisk->flags[] values. */
-#define GENHD_FL_XEN 2 /* Is unit a Xen block device? */
-#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
-
-/*
- * We have one of these per vbd, whether ide, scsi or 'other'.
- * They hang in an array off the gendisk structure. We may end up putting
- * all kinds of interesting stuff here :-)
- */
-typedef struct xl_disk {
- int usage;
-} xl_disk_t;
-
-extern int xen_control_msg(int operration, char *buffer, int size);
-extern int xen_block_open(struct inode *inode, struct file *filep);
-extern int xen_block_release(struct inode *inode, struct file *filep);
-extern int xen_block_ioctl(struct inode *inode, struct file *filep,
- unsigned command, unsigned long argument);
-extern int xen_block_check(kdev_t dev);
-extern int xen_block_revalidate(kdev_t dev);
-extern void do_xlblk_request (request_queue_t *rq);
-
-extern void xlvbd_update_vbds(void);
-
-static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
-{
- struct gendisk *gd = get_gendisk(xldev);
-
- if ( gd == NULL )
- return NULL;
-
- return (xl_disk_t *)gd->real_devices +
- (MINOR(xldev) >> gd->minor_shift);
-}
-
-
-/* Virtual block-device subsystem. */
-extern int xlvbd_init(void);
-extern void xlvbd_cleanup(void);
-
-#endif /* __XEN_DRIVERS_BLOCK_H__ */
--- /dev/null
+O_TARGET := drv.o
+obj-y := block.o vbd.o
+include $(TOPDIR)/Rules.make
--- /dev/null
+/******************************************************************************
+ * block.c
+ *
+ * Xenolinux virtual block-device driver.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ */
+
+#include "block.h"
+#include <linux/blk.h>
+#include <linux/cdrom.h>
+#include <linux/tqueue.h>
+#include <linux/sched.h>
+#include <scsi/scsi.h>
+
+#include <linux/interrupt.h>
+
+typedef unsigned char byte; /* from linux/ide.h */
+
+#define STATE_ACTIVE 0
+#define STATE_SUSPENDED 1
+#define STATE_CLOSED 2
+static unsigned int state = STATE_SUSPENDED;
+
+/* Dynamically-mapped IRQs. */
+static int xlblk_response_irq, xlblk_update_irq;
+
+static blk_ring_t *blk_ring;
+static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
+static BLK_RING_IDX req_prod; /* Private request producer. */
+
+/* We plug the I/O ring if the driver is suspended or if the ring is full. */
+#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
+ (state != STATE_ACTIVE))
+
+
+/*
+ * Request queues with outstanding work, but ring is currently full.
+ * We need no special lock here, as we always access this with the
+ * io_request_lock held. We only need a small maximum list.
+ */
+#define MAX_PENDING 8
+static request_queue_t *pending_queues[MAX_PENDING];
+static int nr_pending;
+
+static kdev_t sg_dev;
+static int sg_operation = -1;
+static unsigned long sg_next_sect;
+#define DISABLE_SCATTERGATHER() (sg_operation = -1)
+
+static inline void signal_requests_to_xen(void)
+{
+ block_io_op_t op;
+
+ DISABLE_SCATTERGATHER();
+ blk_ring->req_prod = req_prod;
+
+ op.cmd = BLOCK_IO_OP_SIGNAL;
+ HYPERVISOR_block_io_op(&op);
+ return;
+}
+
+
+/*
+ * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
+ *
+ * Schedule a task for keventd to run, which will update the VBDs and perform
+ * the corresponding updates to our view of VBD state, so the XenoLinux will
+ * respond to changes / additions / deletions to the set of VBDs automatically.
+ */
+static struct tq_struct update_tq;
+static void update_vbds_task(void *unused)
+{
+ xlvbd_update_vbds();
+}
+static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ update_tq.routine = update_vbds_task;
+ schedule_task(&update_tq);
+}
+
+
+int xen_block_open(struct inode *inode, struct file *filep)
+{
+ short xldev = inode->i_rdev;
+ struct gendisk *gd = get_gendisk(xldev);
+ xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
+ short minor = MINOR(xldev);
+
+ if ( gd->part[minor].nr_sects == 0 )
+ {
+ /*
+ * Device either doesn't exist, or has zero capacity; we use a few
+ * cheesy heuristics to return the relevant error code
+ */
+ if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
+ ((minor & (gd->max_p - 1)) != 0) )
+ {
+ /*
+ * We have a real device, but no such partition, or we just have a
+ * partition number so guess this is the problem.
+ */
+ return -ENXIO; /* no such device or address */
+ }
+ else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
+ {
+ /* This is a removable device => assume that media is missing. */
+ return -ENOMEDIUM; /* media not present (this is a guess) */
+ }
+ else
+ {
+ /* Just go for the general 'no such device' error. */
+ return -ENODEV; /* no such device */
+ }
+ }
+
+ /* Update of usage count is protected by per-device semaphore. */
+ disk->usage++;
+
+ return 0;
+}
+
+
+int xen_block_release(struct inode *inode, struct file *filep)
+{
+ xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
+
+ /*
+ * When usage drops to zero it may allow more VBD updates to occur.
+ * Update of usage count is protected by a per-device semaphore.
+ */
+ if ( --disk->usage == 0 )
+ {
+ update_tq.routine = update_vbds_task;
+ schedule_task(&update_tq);
+ }
+
+ return 0;
+}
+
+
+int xen_block_ioctl(struct inode *inode, struct file *filep,
+ unsigned command, unsigned long argument)
+{
+ kdev_t dev = inode->i_rdev;
+ struct hd_geometry *geo = (struct hd_geometry *)argument;
+ struct gendisk *gd;
+ struct hd_struct *part;
+ int i;
+
+ /* NB. No need to check permissions. That is done for us. */
+
+ DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
+ command, (long) argument, dev);
+
+ gd = get_gendisk(dev);
+ part = &gd->part[MINOR(dev)];
+
+ switch ( command )
+ {
+ case BLKGETSIZE:
+ DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
+ return put_user(part->nr_sects, (unsigned long *) argument);
+
+ case BLKGETSIZE64:
+ DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
+ (u64)part->nr_sects * 512);
+ return put_user((u64)part->nr_sects * 512, (u64 *) argument);
+
+ case BLKRRPART: /* re-read partition table */
+ DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART);
+ return xen_block_revalidate(dev);
+
+ case BLKSSZGET:
+ return hardsect_size[MAJOR(dev)][MINOR(dev)];
+
+ case BLKBSZGET: /* get block size */
+ DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET);
+ break;
+
+ case BLKBSZSET: /* set block size */
+ DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET);
+ break;
+
+ case BLKRASET: /* set read-ahead */
+ DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET);
+ break;
+
+ case BLKRAGET: /* get read-ahead */
+ DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET);
+ break;
+
+ case HDIO_GETGEO:
+ /* note: these values are complete garbage */
+ DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO);
+ if (!argument) return -EINVAL;
+ if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
+ if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
+ if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
+ if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
+ return 0;
+
+ case HDIO_GETGEO_BIG:
+ /* note: these values are complete garbage */
+ DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
+ if (!argument) return -EINVAL;
+ if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
+ if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
+ if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
+ if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
+ return 0;
+
+ case CDROMMULTISESSION:
+ DPRINTK("FIXME: support multisession CDs later\n");
+ for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
+ if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
+ return 0;
+
+ case SCSI_IOCTL_GET_BUS_NUMBER:
+ DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev");
+ return -ENOSYS;
+
+ default:
+ printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command);
+ return -ENOSYS;
+ }
+
+ return 0;
+}
+
+/* check media change: should probably do something here in some cases :-) */
+int xen_block_check(kdev_t dev)
+{
+ DPRINTK("xen_block_check\n");
+ return 0;
+}
+
+int xen_block_revalidate(kdev_t dev)
+{
+ struct block_device *bd;
+ struct gendisk *gd;
+ xl_disk_t *disk;
+ unsigned long capacity;
+ int i, rc = 0;
+
+ if ( (bd = bdget(dev)) == NULL )
+ return -EINVAL;
+
+ /*
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
+ */
+ down(&bd->bd_sem);
+
+ if ( ((gd = get_gendisk(dev)) == NULL) ||
+ ((disk = xldev_to_xldisk(dev)) == NULL) ||
+ ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if ( disk->usage > 1 )
+ {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ /* Only reread partition table if VBDs aren't mapped to partitions. */
+ if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
+ {
+ for ( i = gd->max_p - 1; i >= 0; i-- )
+ {
+ invalidate_device(dev+i, 1);
+ gd->part[MINOR(dev+i)].start_sect = 0;
+ gd->part[MINOR(dev+i)].nr_sects = 0;
+ gd->sizes[MINOR(dev+i)] = 0;
+ }
+
+ grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
+ }
+
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return rc;
+}
+
+
+/*
+ * hypervisor_request
+ *
+ * request block io
+ *
+ * id: for guest use only.
+ * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
+ * buffer: buffer to read/write into. this should be a
+ * virtual address in the guest os.
+ */
+static int hypervisor_request(unsigned long id,
+ int operation,
+ char * buffer,
+ unsigned long sector_number,
+ unsigned short nr_sectors,
+ kdev_t device)
+{
+ unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer));
+ struct gendisk *gd;
+ blk_ring_req_entry_t *req;
+ struct buffer_head *bh;
+
+ if ( unlikely(nr_sectors >= (1<<9)) )
+ BUG();
+ if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
+ BUG();
+
+ if ( unlikely(state == STATE_CLOSED) )
+ return 1;
+
+ switch ( operation )
+ {
+
+ case XEN_BLOCK_READ:
+ case XEN_BLOCK_WRITE:
+ gd = get_gendisk(device);
+
+ /*
+ * Update the sector_number we'll pass down as appropriate; note that
+ * we could sanity check that resulting sector will be in this
+ * partition, but this will happen in xen anyhow.
+ */
+ sector_number += gd->part[MINOR(device)].start_sect;
+
+ /*
+ * If this unit doesn't consist of virtual (i.e., Xen-specified)
+ * partitions then we clear the partn bits from the device number.
+ */
+ if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
+ GENHD_FL_VIRT_PARTNS) )
+ device &= ~(gd->max_p - 1);
+
+ if ( (sg_operation == operation) &&
+ (sg_dev == device) &&
+ (sg_next_sect == sector_number) )
+ {
+ req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
+ bh = (struct buffer_head *)id;
+ bh->b_reqnext = (struct buffer_head *)req->id;
+ req->id = id;
+ req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
+ if ( ++req->nr_segments < MAX_BLK_SEGS )
+ sg_next_sect += nr_sectors;
+ else
+ DISABLE_SCATTERGATHER();
+ return 0;
+ }
+ else if ( RING_PLUGGED )
+ {
+ return 1;
+ }
+ else
+ {
+ sg_operation = operation;
+ sg_dev = device;
+ sg_next_sect = sector_number + nr_sectors;
+ }
+ break;
+
+ default:
+ panic("unknown op %d\n", operation);
+ }
+
+ /* Fill out a communications ring structure. */
+ req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
+ req->id = id;
+ req->operation = operation;
+ req->sector_number = (xen_sector_t)sector_number;
+ req->device = device;
+ req->nr_segments = 1;
+ req->buffer_and_sects[0] = buffer_ma | nr_sectors;
+ req_prod++;
+
+ return 0;
+}
+
+
+/*
+ * do_xlblk_request
+ * read a block; request is in a request queue
+ */
+void do_xlblk_request(request_queue_t *rq)
+{
+ struct request *req;
+ struct buffer_head *bh, *next_bh;
+ int rw, nsect, full, queued = 0;
+
+ DPRINTK("xlblk.c::do_xlblk_request\n");
+
+ while ( !rq->plugged && !list_empty(&rq->queue_head))
+ {
+ if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
+ goto out;
+
+ DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
+ req, req->cmd, req->sector,
+ req->current_nr_sectors, req->nr_sectors, req->bh);
+
+ rw = req->cmd;
+ if ( rw == READA )
+ rw = READ;
+ if ( unlikely((rw != READ) && (rw != WRITE)) )
+ panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
+
+ req->errors = 0;
+
+ bh = req->bh;
+ while ( bh != NULL )
+ {
+ next_bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+
+ full = hypervisor_request(
+ (unsigned long)bh,
+ (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE,
+ bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
+
+ if ( full )
+ {
+ bh->b_reqnext = next_bh;
+ pending_queues[nr_pending++] = rq;
+ if ( unlikely(nr_pending >= MAX_PENDING) )
+ BUG();
+ goto out;
+ }
+
+ queued++;
+
+ /* Dequeue the buffer head from the request. */
+ nsect = bh->b_size >> 9;
+ bh = req->bh = next_bh;
+
+ if ( bh != NULL )
+ {
+ /* There's another buffer head to do. Update the request. */
+ req->hard_sector += nsect;
+ req->hard_nr_sectors -= nsect;
+ req->sector = req->hard_sector;
+ req->nr_sectors = req->hard_nr_sectors;
+ req->current_nr_sectors = bh->b_size >> 9;
+ req->buffer = bh->b_data;
+ }
+ else
+ {
+ /* That was the last buffer head. Finalise the request. */
+ if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
+ BUG();
+ blkdev_dequeue_request(req);
+ end_that_request_last(req);
+ }
+ }
+ }
+
+ out:
+ if ( queued != 0 ) signal_requests_to_xen();
+}
+
+
+static void kick_pending_request_queues(void)
+{
+ /* We kick pending request queues if the ring is reasonably empty. */
+ if ( (nr_pending != 0) &&
+ ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
+ {
+ /* Attempt to drain the queue, but bail if the ring becomes full. */
+ while ( (nr_pending != 0) && !RING_PLUGGED )
+ do_xlblk_request(pending_queues[--nr_pending]);
+ }
+}
+
+
+static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ BLK_RING_IDX i;
+ unsigned long flags;
+ struct buffer_head *bh, *next_bh;
+
+ if ( unlikely(state == STATE_CLOSED) )
+ return;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+
+ for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
+ {
+ blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
+ switch ( bret->operation )
+ {
+ case XEN_BLOCK_READ:
+ case XEN_BLOCK_WRITE:
+ if ( unlikely(bret->status != 0) )
+ DPRINTK("Bad return from blkdev data request: %lx\n",
+ bret->status);
+ for ( bh = (struct buffer_head *)bret->id;
+ bh != NULL;
+ bh = next_bh )
+ {
+ next_bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+ bh->b_end_io(bh, !bret->status);
+ }
+ break;
+
+ default:
+ BUG();
+ }
+ }
+
+ resp_cons = i;
+
+ kick_pending_request_queues();
+
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+
+static void reset_xlblk_interface(void)
+{
+ block_io_op_t op;
+
+ nr_pending = 0;
+
+ op.cmd = BLOCK_IO_OP_RESET;
+ if ( HYPERVISOR_block_io_op(&op) != 0 )
+ printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
+
+ op.cmd = BLOCK_IO_OP_RING_ADDRESS;
+ (void)HYPERVISOR_block_io_op(&op);
+
+ set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
+ blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
+ blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
+
+ wmb();
+ state = STATE_ACTIVE;
+}
+
+
+int __init xlblk_init(void)
+{
+ int error;
+
+ reset_xlblk_interface();
+
+ xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
+ xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD);
+
+ error = request_irq(xlblk_response_irq, xlblk_response_int,
+ SA_SAMPLE_RANDOM, "blkdev", NULL);
+ if ( error )
+ {
+ printk(KERN_ALERT "Could not allocate receive interrupt\n");
+ goto fail;
+ }
+
+ error = request_irq(xlblk_update_irq, xlblk_update_int,
+ 0, "blkdev", NULL);
+
+ if ( error )
+ {
+ printk(KERN_ALERT "Could not allocate block update interrupt\n");
+ goto fail;
+ }
+
+ (void)xlvbd_init();
+
+ return 0;
+
+ fail:
+ return error;
+}
+
+
+static void __exit xlblk_cleanup(void)
+{
+ xlvbd_cleanup();
+ free_irq(xlblk_response_irq, NULL);
+ free_irq(xlblk_update_irq, NULL);
+ unbind_virq_from_irq(VIRQ_BLKDEV);
+ unbind_virq_from_irq(VIRQ_VBD_UPD);
+}
+
+
+#ifdef MODULE
+module_init(xlblk_init);
+module_exit(xlblk_cleanup);
+#endif
+
+
+void blkdev_suspend(void)
+{
+ state = STATE_SUSPENDED;
+ wmb();
+
+ while ( resp_cons != blk_ring->req_prod )
+ {
+ barrier();
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(1);
+ }
+
+ wmb();
+ state = STATE_CLOSED;
+ wmb();
+
+ clear_fixmap(FIX_BLKRING_BASE);
+}
+
+
+void blkdev_resume(void)
+{
+ reset_xlblk_interface();
+ spin_lock_irq(&io_request_lock);
+ kick_pending_request_queues();
+ spin_unlock_irq(&io_request_lock);
+}
--- /dev/null
+/******************************************************************************
+ * block.h
+ *
+ * Shared definitions between all levels of XenoLinux Virtual block devices.
+ */
+
+#ifndef __XEN_DRIVERS_BLOCK_H__
+#define __XEN_DRIVERS_BLOCK_H__
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <linux/fs.h>
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/major.h>
+
+#include <asm/hypervisor-ifs/hypervisor-if.h>
+#include <asm/hypervisor-ifs/vbd.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#if 0
+#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#if 0
+#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
+#else
+#define DPRINTK_IOCTL(_f, _a...) ((void)0)
+#endif
+
+/* Private gendisk->flags[] values. */
+#define GENHD_FL_XEN 2 /* Is unit a Xen block device? */
+#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'.
+ * They hang in an array off the gendisk structure. We may end up putting
+ * all kinds of interesting stuff here :-)
+ */
+typedef struct xl_disk {
+ int usage;
+} xl_disk_t;
+
+extern int xen_control_msg(int operration, char *buffer, int size);
+extern int xen_block_open(struct inode *inode, struct file *filep);
+extern int xen_block_release(struct inode *inode, struct file *filep);
+extern int xen_block_ioctl(struct inode *inode, struct file *filep,
+ unsigned command, unsigned long argument);
+extern int xen_block_check(kdev_t dev);
+extern int xen_block_revalidate(kdev_t dev);
+extern void do_xlblk_request (request_queue_t *rq);
+
+extern void xlvbd_update_vbds(void);
+
+static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
+{
+ struct gendisk *gd = get_gendisk(xldev);
+
+ if ( gd == NULL )
+ return NULL;
+
+ return (xl_disk_t *)gd->real_devices +
+ (MINOR(xldev) >> gd->minor_shift);
+}
+
+
+/* Virtual block-device subsystem. */
+extern int xlvbd_init(void);
+extern void xlvbd_cleanup(void);
+
+#endif /* __XEN_DRIVERS_BLOCK_H__ */
--- /dev/null
+/******************************************************************************
+ * vbd.c
+ *
+ * Xenolinux virtual block-device driver (xvd).
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ */
+
+#include "block.h"
+#include <linux/blk.h>
+
+/*
+ * For convenience we distinguish between ide, scsi and 'other' (i.e.
+ * potentially combinations of the two) in the naming scheme and in a few
+ * other places (like default readahead, etc).
+ */
+#define XLIDE_MAJOR_NAME "hd"
+#define XLSCSI_MAJOR_NAME "sd"
+#define XLVBD_MAJOR_NAME "xvd"
+
+#define XLIDE_DEVS_PER_MAJOR 2
+#define XLSCSI_DEVS_PER_MAJOR 16
+#define XLVBD_DEVS_PER_MAJOR 16
+
+#define XLIDE_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */
+#define XLIDE_MAX_PART (1 << XLIDE_PARTN_SHIFT) /* minors per ide vbd */
+
+#define XLSCSI_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */
+#define XLSCSI_MAX_PART (1 << XLSCSI_PARTN_SHIFT) /* minors per scsi vbd */
+
+#define XLVBD_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */
+#define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
+
+/* The below are for the generic drivers/block/ll_rw_block.c code. */
+static int xlide_blksize_size[256];
+static int xlide_hardsect_size[256];
+static int xlide_max_sectors[256];
+static int xlscsi_blksize_size[256];
+static int xlscsi_hardsect_size[256];
+static int xlscsi_max_sectors[256];
+static int xlvbd_blksize_size[256];
+static int xlvbd_hardsect_size[256];
+static int xlvbd_max_sectors[256];
+
+/* Information from Xen about our VBDs. */
+#define MAX_VBDS 64
+static int nr_vbds;
+static xen_disk_t *vbd_info;
+
+static struct block_device_operations xlvbd_block_fops =
+{
+ open: xen_block_open,
+ release: xen_block_release,
+ ioctl: xen_block_ioctl,
+ check_media_change: xen_block_check,
+ revalidate: xen_block_revalidate,
+};
+
+static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
+{
+ int error;
+ block_io_op_t op;
+
+ /* Probe for disk information. */
+ memset(&op, 0, sizeof(op));
+ op.cmd = BLOCK_IO_OP_VBD_PROBE;
+ op.u.probe_params.domain = 0;
+ op.u.probe_params.xdi.max = MAX_VBDS;
+ op.u.probe_params.xdi.disks = disk_info;
+ op.u.probe_params.xdi.count = 0;
+
+ if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
+ {
+ printk(KERN_ALERT "Could not probe disks (%d)\n", error);
+ return -1;
+ }
+
+ return op.u.probe_params.xdi.count;
+}
+
+/*
+ * xlvbd_init_device - initialise a VBD device
+ * @disk: a xen_disk_t describing the VBD
+ *
+ * Takes a xen_disk_t * that describes a VBD the domain has access to.
+ * Performs appropriate initialisation and registration of the device.
+ *
+ * Care needs to be taken when making re-entrant calls to ensure that
+ * corruption does not occur. Also, devices that are in use should not have
+ * their details updated. This is the caller's responsibility.
+ */
+static int xlvbd_init_device(xen_disk_t *xd)
+{
+ int device = xd->device;
+ int major = MAJOR(device);
+ int minor = MINOR(device);
+ int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */
+ int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */
+ char *major_name;
+ struct gendisk *gd;
+ struct block_device *bd;
+ xl_disk_t *disk;
+ int i, rc = 0, max_part, partno;
+ unsigned long capacity;
+
+ unsigned char buf[64];
+
+ if ( (bd = bdget(device)) == NULL )
+ return -1;
+
+ /*
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
+ */
+ down(&bd->bd_sem);
+
+ if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
+ {
+ printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
+ rc = -1;
+ goto out;
+ }
+
+ if ( is_ide ) {
+
+ major_name = XLIDE_MAJOR_NAME;
+ max_part = XLIDE_MAX_PART;
+
+ } else if ( is_scsi ) {
+
+ major_name = XLSCSI_MAJOR_NAME;
+ max_part = XLSCSI_MAX_PART;
+
+ } else if (XD_VIRTUAL(xd->info)) {
+
+ major_name = XLVBD_MAJOR_NAME;
+ max_part = XLVBD_MAX_PART;
+
+ } else {
+
+ /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
+ printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n",
+ major, minor);
+ is_scsi = 1;
+ major_name = "cciss";
+ max_part = XLSCSI_MAX_PART;
+
+ }
+
+ partno = minor & (max_part - 1);
+
+ if ( (gd = get_gendisk(device)) == NULL )
+ {
+ rc = register_blkdev(major, major_name, &xlvbd_block_fops);
+ if ( rc < 0 )
+ {
+ printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
+ goto out;
+ }
+
+ if ( is_ide )
+ {
+ blksize_size[major] = xlide_blksize_size;
+ hardsect_size[major] = xlide_hardsect_size;
+ max_sectors[major] = xlide_max_sectors;
+ read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */
+ }
+ else if ( is_scsi )
+ {
+ blksize_size[major] = xlscsi_blksize_size;
+ hardsect_size[major] = xlscsi_hardsect_size;
+ max_sectors[major] = xlscsi_max_sectors;
+ read_ahead[major] = 0; /* XXX 8; -- guessing */
+ }
+ else
+ {
+ blksize_size[major] = xlvbd_blksize_size;
+ hardsect_size[major] = xlvbd_hardsect_size;
+ max_sectors[major] = xlvbd_max_sectors;
+ read_ahead[major] = 8;
+ }
+
+ blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
+
+ /*
+ * Turn off barking 'headactive' mode. We dequeue buffer heads as
+ * soon as we pass them down to Xen.
+ */
+ blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
+
+ /* Construct an appropriate gendisk structure. */
+ gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
+ gd->major = major;
+ gd->major_name = major_name;
+
+ gd->max_p = max_part;
+ if ( is_ide )
+ {
+ gd->minor_shift = XLIDE_PARTN_SHIFT;
+ gd->nr_real = XLIDE_DEVS_PER_MAJOR;
+ }
+ else if ( is_scsi )
+ {
+ gd->minor_shift = XLSCSI_PARTN_SHIFT;
+ gd->nr_real = XLSCSI_DEVS_PER_MAJOR;
+ }
+ else
+ {
+ gd->minor_shift = XLVBD_PARTN_SHIFT;
+ gd->nr_real = XLVBD_DEVS_PER_MAJOR;
+ }
+
+ /*
+ ** The sizes[] and part[] arrays hold the sizes and other
+ ** information about every partition with this 'major' (i.e.
+ ** every disk sharing the 8 bit prefix * max partns per disk)
+ */
+ gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL);
+ gd->part = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct),
+ GFP_KERNEL);
+ memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int));
+ memset(gd->part, 0, max_part * gd->nr_real
+ * sizeof(struct hd_struct));
+
+
+ gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t),
+ GFP_KERNEL);
+ memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t));
+
+ gd->next = NULL;
+ gd->fops = &xlvbd_block_fops;
+
+ gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr),
+ GFP_KERNEL);
+ gd->flags = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL);
+
+ memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr));
+ memset(gd->flags, 0, gd->nr_real * sizeof(*gd->flags));
+
+ add_gendisk(gd);
+
+ blk_size[major] = gd->sizes;
+ }
+
+ if ( XD_READONLY(xd->info) )
+ set_device_ro(device, 1);
+
+ gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN;
+
+ /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
+ capacity = (unsigned long)xd->capacity;
+
+ if ( partno != 0 )
+ {
+ /*
+ * If this was previously set up as a real disc we will have set
+ * up partition-table information. Virtual partitions override
+ * 'real' partitions, and the two cannot coexist on a device.
+ */
+ if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
+ (gd->sizes[minor & ~(max_part-1)] != 0) )
+ {
+ /*
+ * Any non-zero sub-partition entries must be cleaned out before
+ * installing 'virtual' partition entries. The two types cannot
+ * coexist, and virtual partitions are favoured.
+ */
+ kdev_t dev = device & ~(max_part-1);
+ for ( i = max_part - 1; i > 0; i-- )
+ {
+ invalidate_device(dev+i, 1);
+ gd->part[MINOR(dev+i)].start_sect = 0;
+ gd->part[MINOR(dev+i)].nr_sects = 0;
+ gd->sizes[MINOR(dev+i)] = 0;
+ }
+ printk(KERN_ALERT
+ "Virtual partitions found for /dev/%s - ignoring any "
+ "real partition information we may have found.\n",
+ disk_name(gd, MINOR(device), buf));
+ }
+
+ /* Need to skankily setup 'partition' information */
+ gd->part[minor].start_sect = 0;
+ gd->part[minor].nr_sects = capacity;
+ gd->sizes[minor] = capacity;
+
+ gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
+ }
+ else
+ {
+ gd->part[minor].nr_sects = capacity;
+ gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
+
+ /* Some final fix-ups depending on the device type */
+ switch ( XD_TYPE(xd->info) )
+ {
+ case XD_TYPE_CDROM:
+ case XD_TYPE_FLOPPY:
+ case XD_TYPE_TAPE:
+ gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE;
+ printk(KERN_ALERT
+ "Skipping partition check on %s /dev/%s\n",
+ XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" :
+ (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" :
+ "floppy"), disk_name(gd, MINOR(device), buf));
+ break;
+
+ case XD_TYPE_DISK:
+ /* Only check partitions on real discs (not virtual!). */
+ if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
+ {
+ printk(KERN_ALERT
+ "Skipping partition check on virtual /dev/%s\n",
+ disk_name(gd, MINOR(device), buf));
+ break;
+ }
+ register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
+ break;
+
+ default:
+ printk(KERN_ALERT "XenoLinux: unknown device type %d\n",
+ XD_TYPE(xd->info));
+ break;
+ }
+ }
+
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return rc;
+}
+
+
+/*
+ * xlvbd_remove_device - remove a device node if possible
+ * @device: numeric device ID
+ *
+ * Updates the gendisk structure and invalidates devices.
+ *
+ * This is OK for now but in future, should perhaps consider where this should
+ * deallocate gendisks / unregister devices.
+ */
+static int xlvbd_remove_device(int device)
+{
+ int i, rc = 0, minor = MINOR(device);
+ struct gendisk *gd;
+ struct block_device *bd;
+ xl_disk_t *disk = NULL;
+
+ if ( (bd = bdget(device)) == NULL )
+ return -1;
+
+ /*
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
+ */
+ down(&bd->bd_sem);
+
+ if ( ((gd = get_gendisk(device)) == NULL) ||
+ ((disk = xldev_to_xldisk(device)) == NULL) )
+ BUG();
+
+ if ( disk->usage != 0 )
+ {
+ printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
+ rc = -1;
+ goto out;
+ }
+
+ if ( (minor & (gd->max_p-1)) != 0 )
+ {
+ /* 1: The VBD is mapped to a partition rather than a whole unit. */
+ invalidate_device(device, 1);
+ gd->part[minor].start_sect = 0;
+ gd->part[minor].nr_sects = 0;
+ gd->sizes[minor] = 0;
+
+ /* Clear the consists-of-virtual-partitions flag if possible. */
+ gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
+ for ( i = 1; i < gd->max_p; i++ )
+ if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
+ gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
+
+ /*
+ * If all virtual partitions are now gone, and a 'whole unit' VBD is
+ * present, then we can try to grok the unit's real partition table.
+ */
+ if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
+ (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
+ !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
+ {
+ register_disk(gd,
+ device&~(gd->max_p-1),
+ gd->max_p,
+ &xlvbd_block_fops,
+ gd->part[minor&~(gd->max_p-1)].nr_sects);
+ }
+ }
+ else
+ {
+ /*
+ * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
+ * NB. The partition entries are only cleared if there are no VBDs
+ * mapped to individual partitions on this unit.
+ */
+ i = gd->max_p - 1; /* Default: clear subpartitions as well. */
+ if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
+ i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
+ while ( i >= 0 )
+ {
+ invalidate_device(device+i, 1);
+ gd->part[minor+i].start_sect = 0;
+ gd->part[minor+i].nr_sects = 0;
+ gd->sizes[minor+i] = 0;
+ i--;
+ }
+ }
+
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return rc;
+}
+
+/*
+ * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
+ * state. The VBDs need to be updated in this way when the domain is
+ * initialised and also each time we receive an XLBLK_UPDATE event.
+ */
+void xlvbd_update_vbds(void)
+{
+ int i, j, k, old_nr, new_nr;
+ xen_disk_t *old_info, *new_info, *merged_info;
+
+ old_info = vbd_info;
+ old_nr = nr_vbds;
+
+ new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
+ if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
+ {
+ kfree(new_info);
+ return;
+ }
+
+ /*
+ * Final list maximum size is old list + new list. This occurs only when
+ * old list and new list do not overlap at all, and we cannot yet destroy
+ * VBDs in the old list because the usage counts are busy.
+ */
+ merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
+
+ /* @i tracks old list; @j tracks new list; @k tracks merged list. */
+ i = j = k = 0;
+
+ while ( (i < old_nr) && (j < new_nr) )
+ {
+ if ( old_info[i].device < new_info[j].device )
+ {
+ if ( xlvbd_remove_device(old_info[i].device) != 0 )
+ memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
+ i++;
+ }
+ else if ( old_info[i].device > new_info[j].device )
+ {
+ if ( xlvbd_init_device(&new_info[j]) == 0 )
+ memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
+ j++;
+ }
+ else
+ {
+ if ( ((old_info[i].capacity == new_info[j].capacity) &&
+ (old_info[i].info == new_info[j].info)) ||
+ (xlvbd_remove_device(old_info[i].device) != 0) )
+ memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
+ else if ( xlvbd_init_device(&new_info[j]) == 0 )
+ memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
+ i++; j++;
+ }
+ }
+
+ for ( ; i < old_nr; i++ )
+ {
+ if ( xlvbd_remove_device(old_info[i].device) != 0 )
+ memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
+ }
+
+ for ( ; j < new_nr; j++ )
+ {
+ if ( xlvbd_init_device(&new_info[j]) == 0 )
+ memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
+ }
+
+ vbd_info = merged_info;
+ nr_vbds = k;
+
+ kfree(old_info);
+ kfree(new_info);
+}
+
+
+/*
+ * Set up all the linux device goop for the virtual block devices (vbd's) that
+ * xen tells us about. Note that although from xen's pov VBDs are addressed
+ * simply an opaque 16-bit device number, the domain creation tools
+ * conventionally allocate these numbers to correspond to those used by 'real'
+ * linux -- this is just for convenience as it means e.g. that the same
+ * /etc/fstab can be used when booting with or without xen.
+ */
+int __init xlvbd_init(void)
+{
+ int i;
+
+ /*
+ * If compiled as a module, we don't support unloading yet. We therefore
+ * permanently increment the reference count to disallow it.
+ */
+ SET_MODULE_OWNER(&xlvbd_block_fops);
+ MOD_INC_USE_COUNT;
+
+ /* Initialize the global arrays. */
+ for ( i = 0; i < 256; i++ )
+ {
+ /* from the generic ide code (drivers/ide/ide-probe.c, etc) */
+ xlide_blksize_size[i] = 1024;
+ xlide_hardsect_size[i] = 512;
+ xlide_max_sectors[i] = 128; /* 'hwif->rqsize' if we knew it */
+
+ /* from the generic scsi disk code (drivers/scsi/sd.c) */
+ xlscsi_blksize_size[i] = 1024; /* XXX 512; */
+ xlscsi_hardsect_size[i] = 512;
+ xlscsi_max_sectors[i] = 128*8; /* XXX 128; */
+
+ /* we don't really know what to set these too since it depends */
+ xlvbd_blksize_size[i] = 512;
+ xlvbd_hardsect_size[i] = 512;
+ xlvbd_max_sectors[i] = 128;
+ }
+
+ vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
+ nr_vbds = xlvbd_get_vbd_info(vbd_info);
+
+ if ( nr_vbds < 0 )
+ {
+ kfree(vbd_info);
+ vbd_info = NULL;
+ nr_vbds = 0;
+ }
+ else
+ {
+ for ( i = 0; i < nr_vbds; i++ )
+ xlvbd_init_device(&vbd_info[i]);
+ }
+
+ return 0;
+}
+
+
+#ifdef MODULE
+module_init(xlvbd_init);
+#endif
+++ /dev/null
-/******************************************************************************
- * vbd.c
- *
- * Xenolinux virtual block-device driver (xvd).
- *
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
- */
-
-#include "block.h"
-#include <linux/blk.h>
-
-/*
- * For convenience we distinguish between ide, scsi and 'other' (i.e.
- * potentially combinations of the two) in the naming scheme and in a few
- * other places (like default readahead, etc).
- */
-#define XLIDE_MAJOR_NAME "hd"
-#define XLSCSI_MAJOR_NAME "sd"
-#define XLVBD_MAJOR_NAME "xvd"
-
-#define XLIDE_DEVS_PER_MAJOR 2
-#define XLSCSI_DEVS_PER_MAJOR 16
-#define XLVBD_DEVS_PER_MAJOR 16
-
-#define XLIDE_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */
-#define XLIDE_MAX_PART (1 << XLIDE_PARTN_SHIFT) /* minors per ide vbd */
-
-#define XLSCSI_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */
-#define XLSCSI_MAX_PART (1 << XLSCSI_PARTN_SHIFT) /* minors per scsi vbd */
-
-#define XLVBD_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */
-#define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
-
-/* The below are for the generic drivers/block/ll_rw_block.c code. */
-static int xlide_blksize_size[256];
-static int xlide_hardsect_size[256];
-static int xlide_max_sectors[256];
-static int xlscsi_blksize_size[256];
-static int xlscsi_hardsect_size[256];
-static int xlscsi_max_sectors[256];
-static int xlvbd_blksize_size[256];
-static int xlvbd_hardsect_size[256];
-static int xlvbd_max_sectors[256];
-
-/* Information from Xen about our VBDs. */
-#define MAX_VBDS 64
-static int nr_vbds;
-static xen_disk_t *vbd_info;
-
-static struct block_device_operations xlvbd_block_fops =
-{
- open: xen_block_open,
- release: xen_block_release,
- ioctl: xen_block_ioctl,
- check_media_change: xen_block_check,
- revalidate: xen_block_revalidate,
-};
-
-static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
-{
- int error;
- block_io_op_t op;
-
- /* Probe for disk information. */
- memset(&op, 0, sizeof(op));
- op.cmd = BLOCK_IO_OP_VBD_PROBE;
- op.u.probe_params.domain = 0;
- op.u.probe_params.xdi.max = MAX_VBDS;
- op.u.probe_params.xdi.disks = disk_info;
- op.u.probe_params.xdi.count = 0;
-
- if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
- {
- printk(KERN_ALERT "Could not probe disks (%d)\n", error);
- return -1;
- }
-
- return op.u.probe_params.xdi.count;
-}
-
-/*
- * xlvbd_init_device - initialise a VBD device
- * @disk: a xen_disk_t describing the VBD
- *
- * Takes a xen_disk_t * that describes a VBD the domain has access to.
- * Performs appropriate initialisation and registration of the device.
- *
- * Care needs to be taken when making re-entrant calls to ensure that
- * corruption does not occur. Also, devices that are in use should not have
- * their details updated. This is the caller's responsibility.
- */
-static int xlvbd_init_device(xen_disk_t *xd)
-{
- int device = xd->device;
- int major = MAJOR(device);
- int minor = MINOR(device);
- int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */
- int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */
- char *major_name;
- struct gendisk *gd;
- struct block_device *bd;
- xl_disk_t *disk;
- int i, rc = 0, max_part, partno;
- unsigned long capacity;
-
- unsigned char buf[64];
-
- if ( (bd = bdget(device)) == NULL )
- return -1;
-
- /*
- * Update of partition info, and check of usage count, is protected
- * by the per-block-device semaphore.
- */
- down(&bd->bd_sem);
-
- if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
- {
- printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
- rc = -1;
- goto out;
- }
-
- if ( is_ide ) {
-
- major_name = XLIDE_MAJOR_NAME;
- max_part = XLIDE_MAX_PART;
-
- } else if ( is_scsi ) {
-
- major_name = XLSCSI_MAJOR_NAME;
- max_part = XLSCSI_MAX_PART;
-
- } else if (XD_VIRTUAL(xd->info)) {
-
- major_name = XLVBD_MAJOR_NAME;
- max_part = XLVBD_MAX_PART;
-
- } else {
-
- /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
- printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n",
- major, minor);
- is_scsi = 1;
- major_name = "cciss";
- max_part = XLSCSI_MAX_PART;
-
- }
-
- partno = minor & (max_part - 1);
-
- if ( (gd = get_gendisk(device)) == NULL )
- {
- rc = register_blkdev(major, major_name, &xlvbd_block_fops);
- if ( rc < 0 )
- {
- printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
- goto out;
- }
-
- if ( is_ide )
- {
- blksize_size[major] = xlide_blksize_size;
- hardsect_size[major] = xlide_hardsect_size;
- max_sectors[major] = xlide_max_sectors;
- read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */
- }
- else if ( is_scsi )
- {
- blksize_size[major] = xlscsi_blksize_size;
- hardsect_size[major] = xlscsi_hardsect_size;
- max_sectors[major] = xlscsi_max_sectors;
- read_ahead[major] = 0; /* XXX 8; -- guessing */
- }
- else
- {
- blksize_size[major] = xlvbd_blksize_size;
- hardsect_size[major] = xlvbd_hardsect_size;
- max_sectors[major] = xlvbd_max_sectors;
- read_ahead[major] = 8;
- }
-
- blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
-
- /*
- * Turn off barking 'headactive' mode. We dequeue buffer heads as
- * soon as we pass them down to Xen.
- */
- blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
-
- /* Construct an appropriate gendisk structure. */
- gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
- gd->major = major;
- gd->major_name = major_name;
-
- gd->max_p = max_part;
- if ( is_ide )
- {
- gd->minor_shift = XLIDE_PARTN_SHIFT;
- gd->nr_real = XLIDE_DEVS_PER_MAJOR;
- }
- else if ( is_scsi )
- {
- gd->minor_shift = XLSCSI_PARTN_SHIFT;
- gd->nr_real = XLSCSI_DEVS_PER_MAJOR;
- }
- else
- {
- gd->minor_shift = XLVBD_PARTN_SHIFT;
- gd->nr_real = XLVBD_DEVS_PER_MAJOR;
- }
-
- /*
- ** The sizes[] and part[] arrays hold the sizes and other
- ** information about every partition with this 'major' (i.e.
- ** every disk sharing the 8 bit prefix * max partns per disk)
- */
- gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL);
- gd->part = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct),
- GFP_KERNEL);
- memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int));
- memset(gd->part, 0, max_part * gd->nr_real
- * sizeof(struct hd_struct));
-
-
- gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t),
- GFP_KERNEL);
- memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t));
-
- gd->next = NULL;
- gd->fops = &xlvbd_block_fops;
-
- gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr),
- GFP_KERNEL);
- gd->flags = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL);
-
- memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr));
- memset(gd->flags, 0, gd->nr_real * sizeof(*gd->flags));
-
- add_gendisk(gd);
-
- blk_size[major] = gd->sizes;
- }
-
- if ( XD_READONLY(xd->info) )
- set_device_ro(device, 1);
-
- gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN;
-
- /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
- capacity = (unsigned long)xd->capacity;
-
- if ( partno != 0 )
- {
- /*
- * If this was previously set up as a real disc we will have set
- * up partition-table information. Virtual partitions override
- * 'real' partitions, and the two cannot coexist on a device.
- */
- if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
- (gd->sizes[minor & ~(max_part-1)] != 0) )
- {
- /*
- * Any non-zero sub-partition entries must be cleaned out before
- * installing 'virtual' partition entries. The two types cannot
- * coexist, and virtual partitions are favoured.
- */
- kdev_t dev = device & ~(max_part-1);
- for ( i = max_part - 1; i > 0; i-- )
- {
- invalidate_device(dev+i, 1);
- gd->part[MINOR(dev+i)].start_sect = 0;
- gd->part[MINOR(dev+i)].nr_sects = 0;
- gd->sizes[MINOR(dev+i)] = 0;
- }
- printk(KERN_ALERT
- "Virtual partitions found for /dev/%s - ignoring any "
- "real partition information we may have found.\n",
- disk_name(gd, MINOR(device), buf));
- }
-
- /* Need to skankily setup 'partition' information */
- gd->part[minor].start_sect = 0;
- gd->part[minor].nr_sects = capacity;
- gd->sizes[minor] = capacity;
-
- gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
- }
- else
- {
- gd->part[minor].nr_sects = capacity;
- gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
-
- /* Some final fix-ups depending on the device type */
- switch ( XD_TYPE(xd->info) )
- {
- case XD_TYPE_CDROM:
- case XD_TYPE_FLOPPY:
- case XD_TYPE_TAPE:
- gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE;
- printk(KERN_ALERT
- "Skipping partition check on %s /dev/%s\n",
- XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" :
- (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" :
- "floppy"), disk_name(gd, MINOR(device), buf));
- break;
-
- case XD_TYPE_DISK:
- /* Only check partitions on real discs (not virtual!). */
- if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
- {
- printk(KERN_ALERT
- "Skipping partition check on virtual /dev/%s\n",
- disk_name(gd, MINOR(device), buf));
- break;
- }
- register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
- break;
-
- default:
- printk(KERN_ALERT "XenoLinux: unknown device type %d\n",
- XD_TYPE(xd->info));
- break;
- }
- }
-
- out:
- up(&bd->bd_sem);
- bdput(bd);
- return rc;
-}
-
-
-/*
- * xlvbd_remove_device - remove a device node if possible
- * @device: numeric device ID
- *
- * Updates the gendisk structure and invalidates devices.
- *
- * This is OK for now but in future, should perhaps consider where this should
- * deallocate gendisks / unregister devices.
- */
-static int xlvbd_remove_device(int device)
-{
- int i, rc = 0, minor = MINOR(device);
- struct gendisk *gd;
- struct block_device *bd;
- xl_disk_t *disk = NULL;
-
- if ( (bd = bdget(device)) == NULL )
- return -1;
-
- /*
- * Update of partition info, and check of usage count, is protected
- * by the per-block-device semaphore.
- */
- down(&bd->bd_sem);
-
- if ( ((gd = get_gendisk(device)) == NULL) ||
- ((disk = xldev_to_xldisk(device)) == NULL) )
- BUG();
-
- if ( disk->usage != 0 )
- {
- printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
- rc = -1;
- goto out;
- }
-
- if ( (minor & (gd->max_p-1)) != 0 )
- {
- /* 1: The VBD is mapped to a partition rather than a whole unit. */
- invalidate_device(device, 1);
- gd->part[minor].start_sect = 0;
- gd->part[minor].nr_sects = 0;
- gd->sizes[minor] = 0;
-
- /* Clear the consists-of-virtual-partitions flag if possible. */
- gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
- for ( i = 1; i < gd->max_p; i++ )
- if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
- gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
-
- /*
- * If all virtual partitions are now gone, and a 'whole unit' VBD is
- * present, then we can try to grok the unit's real partition table.
- */
- if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
- (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
- !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
- {
- register_disk(gd,
- device&~(gd->max_p-1),
- gd->max_p,
- &xlvbd_block_fops,
- gd->part[minor&~(gd->max_p-1)].nr_sects);
- }
- }
- else
- {
- /*
- * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
- * NB. The partition entries are only cleared if there are no VBDs
- * mapped to individual partitions on this unit.
- */
- i = gd->max_p - 1; /* Default: clear subpartitions as well. */
- if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
- i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
- while ( i >= 0 )
- {
- invalidate_device(device+i, 1);
- gd->part[minor+i].start_sect = 0;
- gd->part[minor+i].nr_sects = 0;
- gd->sizes[minor+i] = 0;
- i--;
- }
- }
-
- out:
- up(&bd->bd_sem);
- bdput(bd);
- return rc;
-}
-
-/*
- * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
- * state. The VBDs need to be updated in this way when the domain is
- * initialised and also each time we receive an XLBLK_UPDATE event.
- */
-void xlvbd_update_vbds(void)
-{
- int i, j, k, old_nr, new_nr;
- xen_disk_t *old_info, *new_info, *merged_info;
-
- old_info = vbd_info;
- old_nr = nr_vbds;
-
- new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
- if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
- {
- kfree(new_info);
- return;
- }
-
- /*
- * Final list maximum size is old list + new list. This occurs only when
- * old list and new list do not overlap at all, and we cannot yet destroy
- * VBDs in the old list because the usage counts are busy.
- */
- merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
-
- /* @i tracks old list; @j tracks new list; @k tracks merged list. */
- i = j = k = 0;
-
- while ( (i < old_nr) && (j < new_nr) )
- {
- if ( old_info[i].device < new_info[j].device )
- {
- if ( xlvbd_remove_device(old_info[i].device) != 0 )
- memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
- i++;
- }
- else if ( old_info[i].device > new_info[j].device )
- {
- if ( xlvbd_init_device(&new_info[j]) == 0 )
- memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
- j++;
- }
- else
- {
- if ( ((old_info[i].capacity == new_info[j].capacity) &&
- (old_info[i].info == new_info[j].info)) ||
- (xlvbd_remove_device(old_info[i].device) != 0) )
- memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
- else if ( xlvbd_init_device(&new_info[j]) == 0 )
- memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
- i++; j++;
- }
- }
-
- for ( ; i < old_nr; i++ )
- {
- if ( xlvbd_remove_device(old_info[i].device) != 0 )
- memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
- }
-
- for ( ; j < new_nr; j++ )
- {
- if ( xlvbd_init_device(&new_info[j]) == 0 )
- memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
- }
-
- vbd_info = merged_info;
- nr_vbds = k;
-
- kfree(old_info);
- kfree(new_info);
-}
-
-
-/*
- * Set up all the linux device goop for the virtual block devices (vbd's) that
- * xen tells us about. Note that although from xen's pov VBDs are addressed
- * simply an opaque 16-bit device number, the domain creation tools
- * conventionally allocate these numbers to correspond to those used by 'real'
- * linux -- this is just for convenience as it means e.g. that the same
- * /etc/fstab can be used when booting with or without xen.
- */
-int __init xlvbd_init(void)
-{
- int i;
-
- /*
- * If compiled as a module, we don't support unloading yet. We therefore
- * permanently increment the reference count to disallow it.
- */
- SET_MODULE_OWNER(&xlvbd_block_fops);
- MOD_INC_USE_COUNT;
-
- /* Initialize the global arrays. */
- for ( i = 0; i < 256; i++ )
- {
- /* from the generic ide code (drivers/ide/ide-probe.c, etc) */
- xlide_blksize_size[i] = 1024;
- xlide_hardsect_size[i] = 512;
- xlide_max_sectors[i] = 128; /* 'hwif->rqsize' if we knew it */
-
- /* from the generic scsi disk code (drivers/scsi/sd.c) */
- xlscsi_blksize_size[i] = 1024; /* XXX 512; */
- xlscsi_hardsect_size[i] = 512;
- xlscsi_max_sectors[i] = 128*8; /* XXX 128; */
-
- /* we don't really know what to set these too since it depends */
- xlvbd_blksize_size[i] = 512;
- xlvbd_hardsect_size[i] = 512;
- xlvbd_max_sectors[i] = 128;
- }
-
- vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
- nr_vbds = xlvbd_get_vbd_info(vbd_info);
-
- if ( nr_vbds < 0 )
- {
- kfree(vbd_info);
- vbd_info = NULL;
- nr_vbds = 0;
- }
- else
- {
- for ( i = 0; i < nr_vbds; i++ )
- xlvbd_init_device(&vbd_info[i]);
- }
-
- return 0;
-}
-
-
-#ifdef MODULE
-module_init(xlvbd_init);
-#endif
+
O_TARGET := drv.o
-obj-y := vnetif.o
+
+subdir-y += frontend
+obj-y += frontend/drv.o
+
+subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
+obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend/drv.o
+
include $(TOPDIR)/Rules.make
--- /dev/null
+O_TARGET := drv.o
+obj-y := main.o
+include $(TOPDIR)/Rules.make
--- /dev/null
+O_TARGET := drv.o
+obj-y := vnetif.o
+include $(TOPDIR)/Rules.make
--- /dev/null
+/******************************************************************************
+ * vnetif.c
+ *
+ * Virtual network driver for XenoLinux.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
+
+static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
+static void network_tx_buf_gc(struct net_device *dev);
+static void network_alloc_rx_buffers(struct net_device *dev);
+static void cleanup_module(void);
+
+/* Dynamically-mapped IRQs. */
+static int network_irq, debug_irq;
+
+static struct list_head dev_list;
+
+struct net_private
+{
+ struct list_head list;
+ struct net_device *dev;
+
+ struct net_device_stats stats;
+ NET_RING_IDX rx_resp_cons, tx_resp_cons;
+ unsigned int net_ring_fixmap_idx, tx_full;
+ net_ring_t *net_ring;
+ net_idx_t *net_idx;
+ spinlock_t tx_lock;
+ unsigned int idx; /* Domain-specific index of this VIF. */
+
+ unsigned int rx_bufs_to_notify;
+
+#define STATE_ACTIVE 0
+#define STATE_SUSPENDED 1
+#define STATE_CLOSED 2
+ unsigned int state;
+
+ /*
+ * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
+ * array is an index into a chain of free entries.
+ */
+ struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1];
+ struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1];
+};
+
+/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
+#define ADD_ID_TO_FREELIST(_list, _id) \
+ (_list)[(_id)] = (_list)[0]; \
+ (_list)[0] = (void *)(unsigned long)(_id);
+#define GET_ID_FROM_FREELIST(_list) \
+ ({ unsigned long _id = (unsigned long)(_list)[0]; \
+ (_list)[0] = (_list)[_id]; \
+ (unsigned short)_id; })
+
+
+static void _dbg_network_int(struct net_device *dev)
+{
+ struct net_private *np = dev->priv;
+
+ if ( np->state == STATE_CLOSED )
+ return;
+
+ printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x,"
+ " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x,"
+ " tx_event=0x%08x, state=%d\n",
+ np->tx_full, np->tx_resp_cons,
+ np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod,
+ np->net_idx->tx_event,
+ test_bit(__LINK_STATE_XOFF, &dev->state));
+ printk(KERN_ALERT "net: rx_resp_cons=0x%08x,"
+ " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n",
+ np->rx_resp_cons, np->net_idx->rx_req_prod,
+ np->net_idx->rx_resp_prod, np->net_idx->rx_event);
+}
+
+
+static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs)
+{
+ struct list_head *ent;
+ struct net_private *np;
+ list_for_each ( ent, &dev_list )
+ {
+ np = list_entry(ent, struct net_private, list);
+ _dbg_network_int(np->dev);
+ }
+}
+
+
+static int network_open(struct net_device *dev)
+{
+ struct net_private *np = dev->priv;
+ netop_t netop;
+ int i, ret;
+
+ netop.cmd = NETOP_RESET_RINGS;
+ netop.vif = np->idx;
+ if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
+ {
+ printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n");
+ return ret;
+ }
+
+ netop.cmd = NETOP_GET_VIF_INFO;
+ netop.vif = np->idx;
+ if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
+ {
+ printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx);
+ return ret;
+ }
+
+ memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
+
+ set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx,
+ netop.u.get_vif_info.ring_mfn << PAGE_SHIFT);
+ np->net_ring = (net_ring_t *)fix_to_virt(
+ FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
+ np->net_idx = &HYPERVISOR_shared_info->net_idx[np->idx];
+
+ np->rx_bufs_to_notify = 0;
+ np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
+ memset(&np->stats, 0, sizeof(np->stats));
+ spin_lock_init(&np->tx_lock);
+ memset(np->net_ring, 0, sizeof(*np->net_ring));
+ memset(np->net_idx, 0, sizeof(*np->net_idx));
+
+ /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
+ for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ )
+ np->tx_skbs[i] = (void *)(i+1);
+ for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ )
+ np->rx_skbs[i] = (void *)(i+1);
+
+ wmb();
+ np->state = STATE_ACTIVE;
+
+ network_alloc_rx_buffers(dev);
+
+ netif_start_queue(dev);
+
+ MOD_INC_USE_COUNT;
+
+ return 0;
+}
+
+
+static void network_tx_buf_gc(struct net_device *dev)
+{
+ NET_RING_IDX i, prod;
+ unsigned short id;
+ struct net_private *np = dev->priv;
+ struct sk_buff *skb;
+ tx_entry_t *tx_ring = np->net_ring->tx_ring;
+
+ do {
+ prod = np->net_idx->tx_resp_prod;
+
+ for ( i = np->tx_resp_cons; i != prod; i++ )
+ {
+ id = tx_ring[MASK_NET_TX_IDX(i)].resp.id;
+ skb = np->tx_skbs[id];
+ ADD_ID_TO_FREELIST(np->tx_skbs, id);
+ dev_kfree_skb_any(skb);
+ }
+
+ np->tx_resp_cons = prod;
+
+ /*
+ * Set a new event, then check for race with update of tx_cons. Note
+ * that it is essential to schedule a callback, no matter how few
+ * buffers are pending. Even if there is space in the transmit ring,
+ * higher layers may be blocked because too much data is outstanding:
+ * in such cases notification from Xen is likely to be the only kick
+ * that we'll get.
+ */
+ np->net_idx->tx_event =
+ prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1;
+ mb();
+ }
+ while ( prod != np->net_idx->tx_resp_prod );
+
+ if ( np->tx_full &&
+ ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) )
+ {
+ np->tx_full = 0;
+ if ( np->state == STATE_ACTIVE )
+ netif_wake_queue(dev);
+ }
+}
+
+
+static inline pte_t *get_ppte(void *addr)
+{
+ pgd_t *pgd; pmd_t *pmd; pte_t *pte;
+ pgd = pgd_offset_k( (unsigned long)addr);
+ pmd = pmd_offset(pgd, (unsigned long)addr);
+ pte = pte_offset(pmd, (unsigned long)addr);
+ return pte;
+}
+
+
+static void network_alloc_rx_buffers(struct net_device *dev)
+{
+ unsigned short id;
+ struct net_private *np = dev->priv;
+ struct sk_buff *skb;
+ netop_t netop;
+ NET_RING_IDX i = np->net_idx->rx_req_prod;
+
+ if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) ||
+ unlikely(np->state != STATE_ACTIVE) )
+ return;
+
+ do {
+ skb = dev_alloc_skb(RX_BUF_SIZE);
+ if ( unlikely(skb == NULL) )
+ break;
+
+ skb->dev = dev;
+
+ if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) )
+ panic("alloc_skb needs to provide us page-aligned buffers.");
+
+ id = GET_ID_FROM_FREELIST(np->rx_skbs);
+ np->rx_skbs[id] = skb;
+
+ np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id = id;
+ np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr =
+ virt_to_machine(get_ppte(skb->head));
+
+ np->rx_bufs_to_notify++;
+ }
+ while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE );
+
+ /*
+ * We may have allocated buffers which have entries outstanding in the page
+ * update queue -- make sure we flush those first!
+ */
+ flush_page_update_queue();
+
+ np->net_idx->rx_req_prod = i;
+ np->net_idx->rx_event = np->rx_resp_cons + 1;
+
+ /* Batch Xen notifications. */
+ if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) )
+ {
+ netop.cmd = NETOP_PUSH_BUFFERS;
+ netop.vif = np->idx;
+ (void)HYPERVISOR_net_io_op(&netop);
+ np->rx_bufs_to_notify = 0;
+ }
+}
+
+
+static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ unsigned short id;
+ struct net_private *np = (struct net_private *)dev->priv;
+ tx_req_entry_t *tx;
+ netop_t netop;
+ NET_RING_IDX i;
+
+ if ( unlikely(np->tx_full) )
+ {
+ printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
+ netif_stop_queue(dev);
+ return -ENOBUFS;
+ }
+
+ if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
+ PAGE_SIZE) )
+ {
+ struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
+ if ( unlikely(new_skb == NULL) )
+ return 1;
+ skb_put(new_skb, skb->len);
+ memcpy(new_skb->data, skb->data, skb->len);
+ dev_kfree_skb(skb);
+ skb = new_skb;
+ }
+
+ spin_lock_irq(&np->tx_lock);
+
+ i = np->net_idx->tx_req_prod;
+
+ id = GET_ID_FROM_FREELIST(np->tx_skbs);
+ np->tx_skbs[id] = skb;
+
+ tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req;
+
+ tx->id = id;
+ tx->addr = phys_to_machine(virt_to_phys(skb->data));
+ tx->size = skb->len;
+
+ wmb();
+ np->net_idx->tx_req_prod = i + 1;
+
+ network_tx_buf_gc(dev);
+
+ if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) )
+ {
+ np->tx_full = 1;
+ netif_stop_queue(dev);
+ }
+
+ spin_unlock_irq(&np->tx_lock);
+
+ np->stats.tx_bytes += skb->len;
+ np->stats.tx_packets++;
+
+ /* Only notify Xen if there are no outstanding responses. */
+ mb();
+ if ( np->net_idx->tx_resp_prod == i )
+ {
+ netop.cmd = NETOP_PUSH_BUFFERS;
+ netop.vif = np->idx;
+ (void)HYPERVISOR_net_io_op(&netop);
+ }
+
+ return 0;
+}
+
+
+static inline void _network_interrupt(struct net_device *dev)
+{
+ struct net_private *np = dev->priv;
+ unsigned long flags;
+ struct sk_buff *skb;
+ rx_resp_entry_t *rx;
+ NET_RING_IDX i;
+
+ if ( unlikely(np->state == STATE_CLOSED) )
+ return;
+
+ spin_lock_irqsave(&np->tx_lock, flags);
+ network_tx_buf_gc(dev);
+ spin_unlock_irqrestore(&np->tx_lock, flags);
+
+ again:
+ for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ )
+ {
+ rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp;
+
+ skb = np->rx_skbs[rx->id];
+ ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
+
+ if ( unlikely(rx->status != RING_STATUS_OK) )
+ {
+ /* Gate this error. We get a (valid) slew of them on suspend. */
+ if ( np->state == STATE_ACTIVE )
+ printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
+ /*
+ * Set up shinfo -- from alloc_skb This was particularily nasty: the
+ * shared info is hidden at the back of the data area (presumably so it
+ * can be shared), but on page flip it gets very spunked.
+ */
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+ skb_shinfo(skb)->frag_list = NULL;
+
+ phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
+ (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
+
+ skb->data = skb->tail = skb->head + rx->offset;
+ skb_put(skb, rx->size);
+ skb->protocol = eth_type_trans(skb, dev);
+
+ np->stats.rx_packets++;
+
+ np->stats.rx_bytes += rx->size;
+ netif_rx(skb);
+ dev->last_rx = jiffies;
+ }
+
+ np->rx_resp_cons = i;
+
+ network_alloc_rx_buffers(dev);
+
+ /* Deal with hypervisor racing our resetting of rx_event. */
+ mb();
+ if ( np->net_idx->rx_resp_prod != i )
+ goto again;
+}
+
+
+static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs)
+{
+ struct list_head *ent;
+ struct net_private *np;
+ list_for_each ( ent, &dev_list )
+ {
+ np = list_entry(ent, struct net_private, list);
+ _network_interrupt(np->dev);
+ }
+}
+
+
+static int network_close(struct net_device *dev)
+{
+ struct net_private *np = dev->priv;
+ netop_t netop;
+
+ np->state = STATE_SUSPENDED;
+ wmb();
+
+ netif_stop_queue(np->dev);
+
+ netop.cmd = NETOP_FLUSH_BUFFERS;
+ netop.vif = np->idx;
+ (void)HYPERVISOR_net_io_op(&netop);
+
+ while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) ||
+ (np->tx_resp_cons != np->net_idx->tx_req_prod) )
+ {
+ barrier();
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(1);
+ }
+
+ wmb();
+ np->state = STATE_CLOSED;
+ wmb();
+
+ /* Now no longer safe to take interrupts for this device. */
+ clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
+
+ MOD_DEC_USE_COUNT;
+
+ return 0;
+}
+
+
+static struct net_device_stats *network_get_stats(struct net_device *dev)
+{
+ struct net_private *np = (struct net_private *)dev->priv;
+ return &np->stats;
+}
+
+
+static int __init init_module(void)
+{
+#if 0
+ int i, fixmap_idx=-1, err;
+ struct net_device *dev;
+ struct net_private *np;
+ netop_t netop;
+
+ INIT_LIST_HEAD(&dev_list);
+
+ network_irq = bind_virq_to_irq(VIRQ_NET);
+ debug_irq = bind_virq_to_irq(VIRQ_DEBUG);
+
+ err = request_irq(network_irq, network_interrupt,
+ SA_SAMPLE_RANDOM, "network", NULL);
+ if ( err )
+ {
+ printk(KERN_WARNING "Could not allocate network interrupt\n");
+ goto fail;
+ }
+
+ err = request_irq(debug_irq, dbg_network_int,
+ SA_SHIRQ, "net_dbg", &dbg_network_int);
+ if ( err )
+ printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
+
+ for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
+ {
+ /* If the VIF is invalid then the query hypercall will fail. */
+ netop.cmd = NETOP_GET_VIF_INFO;
+ netop.vif = i;
+ if ( HYPERVISOR_net_io_op(&netop) != 0 )
+ continue;
+
+ /* We actually only support up to 4 vifs right now. */
+ if ( ++fixmap_idx == 4 )
+ break;
+
+ dev = alloc_etherdev(sizeof(struct net_private));
+ if ( dev == NULL )
+ {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ np = dev->priv;
+ np->state = STATE_CLOSED;
+ np->net_ring_fixmap_idx = fixmap_idx;
+ np->idx = i;
+
+ SET_MODULE_OWNER(dev);
+ dev->open = network_open;
+ dev->hard_start_xmit = network_start_xmit;
+ dev->stop = network_close;
+ dev->get_stats = network_get_stats;
+
+ memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
+
+ if ( (err = register_netdev(dev)) != 0 )
+ {
+ kfree(dev);
+ goto fail;
+ }
+
+ np->dev = dev;
+ list_add(&np->list, &dev_list);
+ }
+
+ return 0;
+
+ fail:
+ cleanup_module();
+ return err;
+#endif
+ return 0;
+}
+
+
+static void cleanup_module(void)
+{
+ struct net_private *np;
+ struct net_device *dev;
+
+ while ( !list_empty(&dev_list) )
+ {
+ np = list_entry(dev_list.next, struct net_private, list);
+ list_del(&np->list);
+ dev = np->dev;
+ unregister_netdev(dev);
+ kfree(dev);
+ }
+
+ free_irq(network_irq, NULL);
+ free_irq(debug_irq, NULL);
+
+ unbind_virq_from_irq(VIRQ_NET);
+ unbind_virq_from_irq(VIRQ_DEBUG);
+}
+
+
+module_init(init_module);
+module_exit(cleanup_module);
+++ /dev/null
-/******************************************************************************
- * vnetif.c
- *
- * Virtual network driver for XenoLinux.
- *
- * Copyright (c) 2002-2004, K A Fraser
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-#include <net/sock.h>
-#include <net/pkt_sched.h>
-
-#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
-
-static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
-static void network_tx_buf_gc(struct net_device *dev);
-static void network_alloc_rx_buffers(struct net_device *dev);
-static void cleanup_module(void);
-
-/* Dynamically-mapped IRQs. */
-static int network_irq, debug_irq;
-
-static struct list_head dev_list;
-
-struct net_private
-{
- struct list_head list;
- struct net_device *dev;
-
- struct net_device_stats stats;
- NET_RING_IDX rx_resp_cons, tx_resp_cons;
- unsigned int net_ring_fixmap_idx, tx_full;
- net_ring_t *net_ring;
- net_idx_t *net_idx;
- spinlock_t tx_lock;
- unsigned int idx; /* Domain-specific index of this VIF. */
-
- unsigned int rx_bufs_to_notify;
-
-#define STATE_ACTIVE 0
-#define STATE_SUSPENDED 1
-#define STATE_CLOSED 2
- unsigned int state;
-
- /*
- * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
- * array is an index into a chain of free entries.
- */
- struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1];
- struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1];
-};
-
-/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
-#define ADD_ID_TO_FREELIST(_list, _id) \
- (_list)[(_id)] = (_list)[0]; \
- (_list)[0] = (void *)(unsigned long)(_id);
-#define GET_ID_FROM_FREELIST(_list) \
- ({ unsigned long _id = (unsigned long)(_list)[0]; \
- (_list)[0] = (_list)[_id]; \
- (unsigned short)_id; })
-
-
-static void _dbg_network_int(struct net_device *dev)
-{
- struct net_private *np = dev->priv;
-
- if ( np->state == STATE_CLOSED )
- return;
-
- printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x,"
- " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x,"
- " tx_event=0x%08x, state=%d\n",
- np->tx_full, np->tx_resp_cons,
- np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod,
- np->net_idx->tx_event,
- test_bit(__LINK_STATE_XOFF, &dev->state));
- printk(KERN_ALERT "net: rx_resp_cons=0x%08x,"
- " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n",
- np->rx_resp_cons, np->net_idx->rx_req_prod,
- np->net_idx->rx_resp_prod, np->net_idx->rx_event);
-}
-
-
-static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs)
-{
- struct list_head *ent;
- struct net_private *np;
- list_for_each ( ent, &dev_list )
- {
- np = list_entry(ent, struct net_private, list);
- _dbg_network_int(np->dev);
- }
-}
-
-
-static int network_open(struct net_device *dev)
-{
- struct net_private *np = dev->priv;
- netop_t netop;
- int i, ret;
-
- netop.cmd = NETOP_RESET_RINGS;
- netop.vif = np->idx;
- if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
- {
- printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n");
- return ret;
- }
-
- netop.cmd = NETOP_GET_VIF_INFO;
- netop.vif = np->idx;
- if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
- {
- printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx);
- return ret;
- }
-
- memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
-
- set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx,
- netop.u.get_vif_info.ring_mfn << PAGE_SHIFT);
- np->net_ring = (net_ring_t *)fix_to_virt(
- FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
- np->net_idx = &HYPERVISOR_shared_info->net_idx[np->idx];
-
- np->rx_bufs_to_notify = 0;
- np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
- memset(&np->stats, 0, sizeof(np->stats));
- spin_lock_init(&np->tx_lock);
- memset(np->net_ring, 0, sizeof(*np->net_ring));
- memset(np->net_idx, 0, sizeof(*np->net_idx));
-
- /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
- for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ )
- np->tx_skbs[i] = (void *)(i+1);
- for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ )
- np->rx_skbs[i] = (void *)(i+1);
-
- wmb();
- np->state = STATE_ACTIVE;
-
- network_alloc_rx_buffers(dev);
-
- netif_start_queue(dev);
-
- MOD_INC_USE_COUNT;
-
- return 0;
-}
-
-
-static void network_tx_buf_gc(struct net_device *dev)
-{
- NET_RING_IDX i, prod;
- unsigned short id;
- struct net_private *np = dev->priv;
- struct sk_buff *skb;
- tx_entry_t *tx_ring = np->net_ring->tx_ring;
-
- do {
- prod = np->net_idx->tx_resp_prod;
-
- for ( i = np->tx_resp_cons; i != prod; i++ )
- {
- id = tx_ring[MASK_NET_TX_IDX(i)].resp.id;
- skb = np->tx_skbs[id];
- ADD_ID_TO_FREELIST(np->tx_skbs, id);
- dev_kfree_skb_any(skb);
- }
-
- np->tx_resp_cons = prod;
-
- /*
- * Set a new event, then check for race with update of tx_cons. Note
- * that it is essential to schedule a callback, no matter how few
- * buffers are pending. Even if there is space in the transmit ring,
- * higher layers may be blocked because too much data is outstanding:
- * in such cases notification from Xen is likely to be the only kick
- * that we'll get.
- */
- np->net_idx->tx_event =
- prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1;
- mb();
- }
- while ( prod != np->net_idx->tx_resp_prod );
-
- if ( np->tx_full &&
- ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) )
- {
- np->tx_full = 0;
- if ( np->state == STATE_ACTIVE )
- netif_wake_queue(dev);
- }
-}
-
-
-static inline pte_t *get_ppte(void *addr)
-{
- pgd_t *pgd; pmd_t *pmd; pte_t *pte;
- pgd = pgd_offset_k( (unsigned long)addr);
- pmd = pmd_offset(pgd, (unsigned long)addr);
- pte = pte_offset(pmd, (unsigned long)addr);
- return pte;
-}
-
-
-static void network_alloc_rx_buffers(struct net_device *dev)
-{
- unsigned short id;
- struct net_private *np = dev->priv;
- struct sk_buff *skb;
- netop_t netop;
- NET_RING_IDX i = np->net_idx->rx_req_prod;
-
- if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) ||
- unlikely(np->state != STATE_ACTIVE) )
- return;
-
- do {
- skb = dev_alloc_skb(RX_BUF_SIZE);
- if ( unlikely(skb == NULL) )
- break;
-
- skb->dev = dev;
-
- if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) )
- panic("alloc_skb needs to provide us page-aligned buffers.");
-
- id = GET_ID_FROM_FREELIST(np->rx_skbs);
- np->rx_skbs[id] = skb;
-
- np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id = id;
- np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr =
- virt_to_machine(get_ppte(skb->head));
-
- np->rx_bufs_to_notify++;
- }
- while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE );
-
- /*
- * We may have allocated buffers which have entries outstanding in the page
- * update queue -- make sure we flush those first!
- */
- flush_page_update_queue();
-
- np->net_idx->rx_req_prod = i;
- np->net_idx->rx_event = np->rx_resp_cons + 1;
-
- /* Batch Xen notifications. */
- if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) )
- {
- netop.cmd = NETOP_PUSH_BUFFERS;
- netop.vif = np->idx;
- (void)HYPERVISOR_net_io_op(&netop);
- np->rx_bufs_to_notify = 0;
- }
-}
-
-
-static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- unsigned short id;
- struct net_private *np = (struct net_private *)dev->priv;
- tx_req_entry_t *tx;
- netop_t netop;
- NET_RING_IDX i;
-
- if ( unlikely(np->tx_full) )
- {
- printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
- netif_stop_queue(dev);
- return -ENOBUFS;
- }
-
- if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
- PAGE_SIZE) )
- {
- struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
- if ( unlikely(new_skb == NULL) )
- return 1;
- skb_put(new_skb, skb->len);
- memcpy(new_skb->data, skb->data, skb->len);
- dev_kfree_skb(skb);
- skb = new_skb;
- }
-
- spin_lock_irq(&np->tx_lock);
-
- i = np->net_idx->tx_req_prod;
-
- id = GET_ID_FROM_FREELIST(np->tx_skbs);
- np->tx_skbs[id] = skb;
-
- tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req;
-
- tx->id = id;
- tx->addr = phys_to_machine(virt_to_phys(skb->data));
- tx->size = skb->len;
-
- wmb();
- np->net_idx->tx_req_prod = i + 1;
-
- network_tx_buf_gc(dev);
-
- if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) )
- {
- np->tx_full = 1;
- netif_stop_queue(dev);
- }
-
- spin_unlock_irq(&np->tx_lock);
-
- np->stats.tx_bytes += skb->len;
- np->stats.tx_packets++;
-
- /* Only notify Xen if there are no outstanding responses. */
- mb();
- if ( np->net_idx->tx_resp_prod == i )
- {
- netop.cmd = NETOP_PUSH_BUFFERS;
- netop.vif = np->idx;
- (void)HYPERVISOR_net_io_op(&netop);
- }
-
- return 0;
-}
-
-
-static inline void _network_interrupt(struct net_device *dev)
-{
- struct net_private *np = dev->priv;
- unsigned long flags;
- struct sk_buff *skb;
- rx_resp_entry_t *rx;
- NET_RING_IDX i;
-
- if ( unlikely(np->state == STATE_CLOSED) )
- return;
-
- spin_lock_irqsave(&np->tx_lock, flags);
- network_tx_buf_gc(dev);
- spin_unlock_irqrestore(&np->tx_lock, flags);
-
- again:
- for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ )
- {
- rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp;
-
- skb = np->rx_skbs[rx->id];
- ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
-
- if ( unlikely(rx->status != RING_STATUS_OK) )
- {
- /* Gate this error. We get a (valid) slew of them on suspend. */
- if ( np->state == STATE_ACTIVE )
- printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
- dev_kfree_skb_any(skb);
- continue;
- }
-
- /*
- * Set up shinfo -- from alloc_skb This was particularily nasty: the
- * shared info is hidden at the back of the data area (presumably so it
- * can be shared), but on page flip it gets very spunked.
- */
- atomic_set(&(skb_shinfo(skb)->dataref), 1);
- skb_shinfo(skb)->nr_frags = 0;
- skb_shinfo(skb)->frag_list = NULL;
-
- phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
- (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
-
- skb->data = skb->tail = skb->head + rx->offset;
- skb_put(skb, rx->size);
- skb->protocol = eth_type_trans(skb, dev);
-
- np->stats.rx_packets++;
-
- np->stats.rx_bytes += rx->size;
- netif_rx(skb);
- dev->last_rx = jiffies;
- }
-
- np->rx_resp_cons = i;
-
- network_alloc_rx_buffers(dev);
-
- /* Deal with hypervisor racing our resetting of rx_event. */
- mb();
- if ( np->net_idx->rx_resp_prod != i )
- goto again;
-}
-
-
-static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs)
-{
- struct list_head *ent;
- struct net_private *np;
- list_for_each ( ent, &dev_list )
- {
- np = list_entry(ent, struct net_private, list);
- _network_interrupt(np->dev);
- }
-}
-
-
-static int network_close(struct net_device *dev)
-{
- struct net_private *np = dev->priv;
- netop_t netop;
-
- np->state = STATE_SUSPENDED;
- wmb();
-
- netif_stop_queue(np->dev);
-
- netop.cmd = NETOP_FLUSH_BUFFERS;
- netop.vif = np->idx;
- (void)HYPERVISOR_net_io_op(&netop);
-
- while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) ||
- (np->tx_resp_cons != np->net_idx->tx_req_prod) )
- {
- barrier();
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(1);
- }
-
- wmb();
- np->state = STATE_CLOSED;
- wmb();
-
- /* Now no longer safe to take interrupts for this device. */
- clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
-
- MOD_DEC_USE_COUNT;
-
- return 0;
-}
-
-
-static struct net_device_stats *network_get_stats(struct net_device *dev)
-{
- struct net_private *np = (struct net_private *)dev->priv;
- return &np->stats;
-}
-
-
-static int __init init_module(void)
-{
-#if 0
- int i, fixmap_idx=-1, err;
- struct net_device *dev;
- struct net_private *np;
- netop_t netop;
-
- INIT_LIST_HEAD(&dev_list);
-
- network_irq = bind_virq_to_irq(VIRQ_NET);
- debug_irq = bind_virq_to_irq(VIRQ_DEBUG);
-
- err = request_irq(network_irq, network_interrupt,
- SA_SAMPLE_RANDOM, "network", NULL);
- if ( err )
- {
- printk(KERN_WARNING "Could not allocate network interrupt\n");
- goto fail;
- }
-
- err = request_irq(debug_irq, dbg_network_int,
- SA_SHIRQ, "net_dbg", &dbg_network_int);
- if ( err )
- printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
-
- for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
- {
- /* If the VIF is invalid then the query hypercall will fail. */
- netop.cmd = NETOP_GET_VIF_INFO;
- netop.vif = i;
- if ( HYPERVISOR_net_io_op(&netop) != 0 )
- continue;
-
- /* We actually only support up to 4 vifs right now. */
- if ( ++fixmap_idx == 4 )
- break;
-
- dev = alloc_etherdev(sizeof(struct net_private));
- if ( dev == NULL )
- {
- err = -ENOMEM;
- goto fail;
- }
-
- np = dev->priv;
- np->state = STATE_CLOSED;
- np->net_ring_fixmap_idx = fixmap_idx;
- np->idx = i;
-
- SET_MODULE_OWNER(dev);
- dev->open = network_open;
- dev->hard_start_xmit = network_start_xmit;
- dev->stop = network_close;
- dev->get_stats = network_get_stats;
-
- memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
-
- if ( (err = register_netdev(dev)) != 0 )
- {
- kfree(dev);
- goto fail;
- }
-
- np->dev = dev;
- list_add(&np->list, &dev_list);
- }
-
- return 0;
-
- fail:
- cleanup_module();
- return err;
-#endif
- return 0;
-}
-
-
-static void cleanup_module(void)
-{
- struct net_private *np;
- struct net_device *dev;
-
- while ( !list_empty(&dev_list) )
- {
- np = list_entry(dev_list.next, struct net_private, list);
- list_del(&np->list);
- dev = np->dev;
- unregister_netdev(dev);
- kfree(dev);
- }
-
- free_irq(network_irq, NULL);
- free_irq(debug_irq, NULL);
-
- unbind_virq_from_irq(VIRQ_NET);
- unbind_virq_from_irq(VIRQ_DEBUG);
-}
-
-
-module_init(init_module);
-module_exit(cleanup_module);